the ranks will be a named num, with
entrezgene_id as name and stat (Wald Test) as
metric
getRanks <- function(res, annot) {
# only taking genes which have entrezgene_ids assigned to them
genes_with_entrez <- select(annot, GeneID, entrezgene_id) %>%
filter(!is.na(entrezgene_id))
ranks <- as.data.frame(res) %>%
tibble::rownames_to_column("GeneID") %>%
merge(genes_with_entrez, by = "GeneID") %>%
arrange(desc(stat)) %>%
select(entrezgene_id, stat) %>%
tibble::deframe() # creating a named num from two columns
return(ranks)
}
ranks.gastroc <- getRanks(res.gastroc, annot)
ranks.soleus <- getRanks(res.soleus, annot)
# TODO: why (again) is the soleus gene count seemingly 300 below soleus gene count / ranks count
# -> seems because of e.g. the cutoff at DESeq
duplicate entrezgene ids (multiple entrez are mapped to the same gene name)
Thus a quick look if any of these genes can be simply omitted. Like if the gene_type is “other” or “tRNA”
Since all ENSEMBL duplicates are also found in the gene_name duplicates, using the ENSEMBL as id for the ranks would reduce the number of duplicates by 107.
actually most of them are “protein coding” and will not be omitted. to be continued …
Pathways are provided by http://www.gsea-msigdb.org/gsea/msigdb/mouse/collections.jsp
For now the Canonical pathways are used. These gene sets represent biological a biological process. They are composed from the following databases taking a subset of CP:
| database | gene sets |
|---|---|
| BioCarta | 252 |
| Reactome | 1249 |
| WikiPathways | 186 |
fgseaRes <- fgsea(
pathways = CGP,
stats = ranks,
minSize = 15,
maxSize = 200
)
using NES from the fgsea result filtering on the set
`pCutoff=`0.01 yields the following plot:
pCutoff <- params$pCutoff
fgseaRes.combined <- merge(
data.frame(fgseaRes.gastroc[, c("pathway", "NES", "padj")]),
data.frame(fgseaRes.soleus[, c("pathway", "NES", "padj")]),
by = "pathway",
suffixes = c(".ga", ".sol")
) %>%
filter(padj.ga < pCutoff | padj.sol < pCutoff) %>%
mutate(
diff.exp = case_when(
NES.ga < 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both down",
NES.ga > 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "both up",
NES.ga < 0 & NES.sol > 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "gastrocnemius down, soleus up",
NES.ga > 0 & NES.sol < 0 & padj.ga < pCutoff & padj.sol < pCutoff ~ "gastrocnemius up, soleus down",
padj.ga < pCutoff & padj.sol > pCutoff ~ "only gastrocnemius",
# NES.ga > 0 & padj.ga < pCutoff & padj.sol > pCutoff ~ "ga up",
padj.ga > pCutoff & padj.sol < pCutoff ~ "only soleus",
# NES.sol > 0 & padj.ga > pCutoff & padj.sol < pCutoff ~ "sol up",
TRUE ~ "different"
)
)
# final plot
p <- ggplot(fgseaRes.combined, aes(x = NES.ga, y = NES.sol, text=pathway)) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_point(aes(color = diff.exp)) +
# scale_color_manual(values = c("red", "chartreuse1", "bisque", "royalblue")) +
labs(x = "gastrocnemius", y = "soleus", color = "significantly\ndifferentially\nexpressed") +
# ggrepel::geom_label_repel(max.overlaps = 20) +
ggtitle(label = "Normalized Enrichment Score") +
theme_bw()
ggsave(filename = file.path("plots", "04_gsea_scatter.svg"), p)
## Saving 7 x 5 in image
p
# interactive:
# plotly::ggplotly(p, tooltip = "all")
p <- ggplot(fgseaRes.combined, aes(x = diff.exp)) +
geom_bar(aes(fill = diff.exp)) +
theme_bw()
ggsave(filename = file.path("plots", "04_gsea_scatter_barplot.svg"), p)
## Saving 7 x 5 in image
p
venn.colors <- c(params$tissue_pal, scales::hue_pal()(4))
sign_geneSets_gastroc <-
data.frame(fgseaRes.gastroc[, c("pathway", "padj")]) %>%
filter(padj < params$pCutoff) %>%
pull(pathway)
sign_geneSets_soleus <-
data.frame(fgseaRes.soleus[, c("pathway", "padj")]) %>%
filter(padj < params$pCutoff) %>%
pull(pathway)
# only the two tissues
gene_sets <- list(
"gastroc" = sign_geneSets_gastroc,
"soleus" = sign_geneSets_soleus#,
# "gastroc&soleus" = sign_gene_stats$shared_sig_genes
)
# 1st option: euler two tissues
p <- plot(
euler(gene_sets),
quantities = T,
legend = list(side = "right"),
fills = venn.colors,
main = "significant gene sets (GSEA)"
)
ggsave(filename = file.path("plots", "04_euler.svg"), p)
## Saving 7 x 5 in image
p
# 2nd option: venn two tissues
library(eulerr)
p <- plot(
eulerr::venn(gene_sets),
fills = params$tissue_pal,
main = "significant gene sets (GSEA)"
)
ggsave(filename = file.path("plots", "04_venn.svg"), p)
## Saving 7 x 5 in image
p
Note: Different to most other plots, the dotplots seem to be created best by running the chunks manually. This results in automatic axis scaling with more entries, so that the labels do not overlap (which is curiously not the case if remotely knitting)!
only 7 gene sets => fits well in the plot
group = "both up"
p_combined <- combined_dotplot(group, fgseaRes.combined)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined
gets adjusted by the function well. The .svg file looks okay
group = "both down"
p_combined <- combined_dotplot(group, fgseaRes.combined)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined
gets adjusted by the function well. The .svg file looks okay
group = "only gastrocnemius"
p_combined <- combined_dotplot(group, fgseaRes.combined, max_label_length = 100)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined
group = "only gastrocnemius"
pathways <- filter(fgseaRes.combined, diff.exp == group)$pathway
gastroc.df <-
filter(fgseaRes.gastroc, pathway %in% pathways) %>%
arrange(NES)
p.gastroc <- fgsea_dotplot(gastroc.df, max_label_length = 100)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, "_single.svg")), p.gastroc)
## Saving 7 x 5 in image
p.gastroc
gets adjusted by the function well. The .svg file looks okay
group = "only soleus"
p_combined <- combined_dotplot(group, fgseaRes.combined, max_label_length = 100, sort_by_gastroc = F)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, ".svg")), p_combined)
## Saving 7 x 5 in image
p_combined
group = "only soleus"
pathways <- filter(fgseaRes.combined, diff.exp == group)$pathway
soleus.df <-
filter(fgseaRes.soleus, pathway %in% pathways) %>%
arrange(NES)
p.soleus <-
fgsea_dotplot(soleus.df, max_label_length = 100)
ggsave(filename = file.path("plots", paste0("04_gsea_dotplot_", group, "_single.svg")), p.soleus)
## Saving 7 x 5 in image
p.soleus
ordering pathways by padj values and using ES to
# ' obtain top pathways ordered by padj and use `ES` for up or down regulation
get_top_pathways <- function(fgseaRes, up = TRUE, pCutoff=params$pCutoff, n=10) {
.updown <- ifelse(up, `>`, `<`)
top.pathways <- fgseaRes %>%
filter(.updown(ES,0), padj < pCutoff) %>%
arrange(padj) %>%
slice_head(n=n)
return(top.pathways)
}
plot for top up and down regulated pathways
# ' plots top n enrichment plots for the given fgsea result
plot_top_enrichment <- function(fgseaRes, pathways, ranks, n = 9, up = TRUE) {
# extracting the top n pathways
top.pathways <- get_top_pathways(fgseaRes, up=up, pCutoff=params$pCutoff, n=n)
plot.list <- list()
# lims <- list("x" = c(0,17000), "y" = c(-0.8,0.0))
for (i in 1:nrow(top.pathways)) {
# filling plot.list with enrichmentPlots
# TODO: how can I use facet_wrap for this?
pathway <- top.pathways[i]$pathway
plt <- plotEnrichment(pathways[[pathway]], ranks) +
# TODO: adjust yaxis to the same scale
# TODO: keep axis.text.x only on the lower row
# TODO: keep axis.text.y only on the right column
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
) # +
# coord_cartesian(xlim = lims$x, ylim = lims$y)
plot.list[[i]] <- plt
}
arrange_plts(plot.list)
}
# ' helper function to arragen the plot from the enrichment
arrange_plts <- function(plt.list) {
nplts <- length(plt.list)
plt <- plt.list[1]
xlab <- plt$labels$x
ylab <- plt$labels$y
# set axis to the same scale
lims <- list("x" = c(0, 17000), "y" = c(-0.8, 0.0))
# remove axis
# arrange the plots
fig_labels <- LETTERS[1:nplts]
patchwork::wrap_plots(plt.list, )
figure <- ggpubr::ggarrange(plotlist = plt.list,
labels = fig_labels) %>%
annotate_figure(left = text_grob(ylab, rot = 90),
bottom = text_grob(xlab))
# TODO: remove all x-axis labels except lower row
# get dimensions
figure$layers
return(figure)
}
# plot_labels <-
# data.frame("label" = LETTERS[1:10], "pathway" = top.pathways)
# knitr::kable(caption = "plot labels", plot_labels)
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=T)
# TODO: add plot labels to return argument of plot_top_enrichment (use list probably)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA |
| B | WP_MICROGLIA_PATHOGEN_PHAGOCYTOSIS_PATHWAY |
| C | WP_APOPTOSIS |
| D | REACTOME_IMMUNOREGULATORY_INTERACTIONS_BETWEEN_A_LYMPHOID_AND_A_NON_LYMPHOID_CELL |
| E | WP_FIBRIN_COMPLEMENT_RECEPTOR_3_SIGNALING_PATHWAY |
| F | WP_CHEMOKINE_SIGNALING_PATHWAY |
| G | BIOCARTA_TNFR2_PATHWAY |
| H | REACTOME_DAP12_INTERACTIONS |
| I | REACTOME_FCGAMMA_RECEPTOR_FCGR_DEPENDENT_PHAGOCYTOSIS |
plot_top_enrichment(fgseaRes.gastroc, CGP, ranks.gastroc, up=F)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.gastroc, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_THE_CITRIC_ACID_TCA_CYCLE_AND_RESPIRATORY_ELECTRON_TRANSPORT |
| B | REACTOME_RESPIRATORY_ELECTRON_TRANSPORT_ATP_SYNTHESIS_BY_CHEMIOSMOTIC_COUPLING_AND_HEAT_PRODUCTION_BY_UNCOUPLING_PROTEINS |
| C | REACTOME_RESPIRATORY_ELECTRON_TRANSPORT |
| D | WP_ELECTRON_TRANSPORT_CHAIN |
| E | REACTOME_COMPLEX_I_BIOGENESIS |
| F | REACTOME_MITOCHONDRIAL_TRANSLATION |
| G | REACTOME_KEAP1_NFE2L2_PATHWAY |
| H | REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA |
| I | REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS |
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=T)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=T, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_SRP_DEPENDENT_COTRANSLATIONAL_PROTEIN_TARGETING_TO_MEMBRANE |
| B | REACTOME_FORMATION_OF_A_POOL_OF_FREE_40S_SUBUNITS |
| C | REACTOME_NONSENSE_MEDIATED_DECAY_NMD_INDEPENDENT_OF_THE_EXON_JUNCTION_COMPLEX_EJC |
| D | WP_CYTOPLASMIC_RIBOSOMAL_PROTEINS |
| E | WP_TYROBP_CAUSAL_NETWORK_IN_MICROGLIA |
| F | REACTOME_EUKARYOTIC_TRANSLATION_INITIATION |
| G | REACTOME_NONSENSE_MEDIATED_DECAY_NMD |
| H | REACTOME_MAJOR_PATHWAY_OF_RRNA_PROCESSING_IN_THE_NUCLEOLUS_AND_CYTOSOL |
| I | REACTOME_PRC2_METHYLATES_HISTONES_AND_DNA |
plot_top_enrichment(fgseaRes.soleus, CGP, ranks.soleus, up=F)
plot_labels <-
data.frame("label" = LETTERS[1:9], "pathway" = get_top_pathways(fgseaRes.soleus, up=F, n=9)$pathway)
knitr::kable(caption = "plot labels", plot_labels)
| label | pathway |
|---|---|
| A | REACTOME_KEAP1_NFE2L2_PATHWAY |
| B | REACTOME_CELLULAR_RESPONSE_TO_CHEMICAL_STRESS |
| C | REACTOME_GLI3_IS_PROCESSED_TO_GLI3R_BY_THE_PROTEASOME |
| D | REACTOME_UBIQUITIN_MEDIATED_DEGRADATION_OF_PHOSPHORYLATED_CDC25A |
| E | REACTOME_RUNX1_REGULATES_TRANSCRIPTION_OF_GENES_INVOLVED_IN_DIFFERENTIATION_OF_HSCS |
| F | REACTOME_CELLULAR_RESPONSE_TO_HYPOXIA |
| G | REACTOME_DEGRADATION_OF_DVL |
| H | REACTOME_ABC_FAMILY_PROTEINS_MEDIATED_TRANSPORT |
| I | REACTOME_ASYMMETRIC_LOCALIZATION_OF_PCP_PROTEINS |
top significant pathways:
# creating up and down regulated pathway vectors separately to maintain order
topUp <- get_top_pathways(fgseaRes.gastroc, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.gastroc, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(-NES) %>%
pull(pathway)
plotGseaTable(
pathways = CGP[topPathways],
stats = ranks.gastroc,
fgseaRes = fgseaRes.gastroc,
gseaParam = 0.5,
render = TRUE
) %>%
ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
top significant pathways:
# creating up and down regulated pathway vectors separately to maintain order
topUp <- get_top_pathways(fgseaRes.soleus, up=T, pCutoff = params$pCutoff, n=10)
topDown <- get_top_pathways(fgseaRes.soleus, up=F, pCutoff = params$pCutoff, n=10)
topPathways <- bind_rows(topUp, topDown) %>%
arrange(-NES) %>%
pull(pathway)
plotGseaTable(
pathways = CGP[topPathways],
stats = ranks.soleus,
fgseaRes = fgseaRes.soleus,
gseaParam = 0.5,
render = TRUE
) %>%
ggpubr::as_ggplot() # needed since, for whatever reason only `NULL` gets returned if `plotGseaTable` is rendered inline
pathways will be sorted here by the absolute sum of the NES…
fgseaRes.combined %>%
filter(diff.exp == "both down") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.min.print = 2))
fgseaRes.combined %>%
filter(diff.exp == "both up") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.min.print = 2))
fgseaRes.combined %>%
filter(diff.exp == "only gastrocnemius") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.min.print = 2))
fgseaRes.combined %>%
filter(diff.exp == "only soleus") %>%
mutate(NES = abs(NES.ga) + abs(NES.sol)) %>%
select(pathway, NES) %>%
arrange(desc(NES)) %>%
rmarkdown::paged_table(options = list(cols.min.print = 2))
maxSize (one sided curve)